library(ggplot2)
library(gridExtra)
library(dplyr)
library(janitor)
library(stringr)
library(miceadds)


library("rstudioapi")     
setwd(dirname(getActiveDocumentContext()$path))
getwd()#

source('ri_functions.R')

# get the data
load("Data/FinalFinal20062023Full1-6.RData")
load("Data/FinalFinalSP20062023Full1-6.RData")

final_finalV2$individual_treatmentRI = ifelse(final_finalV2$individual_treatment == "CDC Health", "CDCHealth", 
                                              ifelse(final_finalV2$individual_treatment == "Placebo", "Placebo",
                                                     ifelse(final_finalV2$individual_treatment == "High Cash", "HighCash", 
                                                            ifelse(final_finalV2$individual_treatment == "Low Cash", "LowCash", NA ))))
final_finalV2$individual_treatmentRI = relevel(as.factor(final_finalV2$individual_treatmentRI), ref = "Placebo")

final_finalV2 = mutate(final_finalV2, 
                       Agec = (Age - 30)/10, # per 10 year age
                       Incomec = (Income - 100)/50, # per 50 income
                       clinic_distancec = (clinic_distance - 5)/5, # per 5
                       SNMetricc = SNMetric/10) %>%
  clean_names() %>%
  select(starts_with('vaccine_'), 'act_vac_april', cash_cat, gender, agec, education, incomec, employed, individual_treatment_ri,
         clinic_distancec, starts_with('dist'), block, village_treatment, q123, region_x) %>% # slim down data
  rename('region' = 'region_x')

with(final_finalV2, table(village_treatment, individual_treatment_ri))


## go through the models
equation_l7 = 'vaccine_intention ~ individual_treatment_ri + gender + agec + 
                       education + incomec + employed + clinic_distancec'
L7 = run_boot(indata = final_finalV2,
               n_boot = 1000,
               equation = equation_l7)
#
equation_l8 = 'vaccine_reported_combo ~ individual_treatment_ri + gender + agec + 
                       education + incomec + employed + clinic_distancec'
L8 = run_boot(indata = final_finalV2,
                        n_boot = 1000,
              equation = equation_l8)
#
equation_l9 = 'act_vac_april ~ individual_treatment_ri + gender + agec + 
                       education + incomec + employed + clinic_distancec'
L9 = run_boot(indata = final_finalV2,
                        n_boot = 1000,
              equation = equation_l9)


## plot of histograms
L7$plot = L7$plot + ggtitle('Intention')
L8$plot = L8$plot + ggtitle('Reported')
L9$plot = L9$plot + ggtitle('Actual')
jpeg('ED_Figure02.jpg', width=5, height=5, units='in', res=500)
grid.arrange(L7$plot, L8$plot, L9$plot, nrow=3, ncol=1)
dev.off()

## Tabulate p-values
for_table = bind_rows(L7$ests, L8$ests, L9$ests, .id = 'outcome') %>%
  mutate(outcome = case_when(
    outcome == 1 ~ 'Intention',
    outcome == 2 ~ 'Reported',
    outcome == 3 ~ 'Actual'
  )) %>%
  select(outcome, rowname, observed_pval, pval_bootstrap)
library(xtable)
print(xtable(for_table, compress = FALSE, digits=4), include.rownames=FALSE, file='TableS3.tex')
